#Load packages
library(dplyr)
library(fabricatr)
library(estimatr)

#Simulations for cross-sectional version
#Setup
set.seed(100)
nsims <- 500
kmin <- 5
kmax <- 50
k <- rep(seq.int(from = kmin, to = kmax), each = nsims)
sim <- rep(seq(nsims), times = length(seq(kmin:kmax)))
ols_error <- NA
modelG_error <- NA
model_g_error <- NA
results_cs <- tibble(k, sim, ols_error, modelG_error, model_g_error)
results_cs <- results_cs %>% mutate_if(is.logical,as.numeric)


for(k in kmin:kmax){
for (i in 1:nsims){
#Draw data
df <- fabricate(
  region = add_level(N = k, Z = rnorm(N)),
  unit = add_level(N = 5, confound = rnorm(N),# 5 units per region
                    D = Z + confound,
                    Y = D + confound + rnorm(N))
)
#Create instrument (G) and leave-one-out version (g)
df <- df %>% group_by(region) %>% mutate(G = mean(D))
df <- df %>% mutate(g = 5*G - D)
#Run models
ols <- lm_robust(Y ~ D, data = df, clusters = region)
modelG <- iv_robust(Y ~ D | G, data = df, clusters = region)
model_g <- iv_robust(Y ~ D | g, data = df, clusters = region)
#Save results_cs
myrow <- (k-kmin)*nsims + i
results_cs[myrow, "ols_error"] <- ols$coefficients[2] - 1
results_cs[myrow, "modelG_error"] <- modelG$coefficients[2] - 1
results_cs[myrow, "model_g_error"] <- model_g$coefficients[2] - 1
}
}
#save(results_cs, file = "results_cs.RData")


#Simulations for panel data version
#Setup
set.seed(100)
nsims <- 500
kmin <- 5
kmax <- 50
k <- rep(seq.int(from = kmin, to = kmax), each = nsims)
sim <- rep(seq(nsims), times = length(seq(kmin:kmax)))
fe_error <- NA
modelG_error <- NA
model_g_error <- NA
results_panel <- tibble(k, sim, fe_error, modelG_error, model_g_error)
results_panel <- results_panel %>% mutate_if(is.logical,as.numeric)

for(k in kmin:kmax){
  for (i in 1:nsims){
    #Draw data
    df <- fabricate(
      unit = add_level(N = 50, country_fe = rnorm(N)), #50 units per year
      period = add_level(N = k, Z = rnorm(N), nest = FALSE),
      obs = cross_levels(
        by = join(unit, period),
        C = rnorm(N),
        D = Z + C + country_fe,
        Y = D + C + country_fe + rnorm(N)
      ))
    #Create instrument (G) and leave-one-out version (g)
    df <- df %>% group_by(period) %>% mutate(G = mean(D))
    df <- df %>% mutate(g = 50*G - D)
    #Run models
    fe <- lm_robust(Y ~ D + as.factor(unit) + as.factor(period), data = df, clusters = unit)
    modelG <- iv_robust(Y ~ D + as.factor(unit) | G + as.factor(unit), data = df, clusters = unit)
    model_g <- iv_robust(Y ~ D + as.factor(unit) | g + as.factor(unit), data = df, clusters = unit)
    #Save results_panel
    myrow <- (k-kmin)*nsims + i
    results_panel[myrow, "fe_error"] <- fe$coefficients[2] - 1
    results_panel[myrow, "modelG_error"] <- modelG$coefficients[2] - 1
    results_panel[myrow, "model_g_error"] <- model_g$coefficients[2] - 1
  }
}
#save(results_panel, file = "results_panel.RData")



